*** Generate and prepare/ clean datasets for split samples

** Split universe of people into two random partitions
	use "$temp\cleaned_obs_all.dta", clear // dataset was generated in values_akm_master.do
	keep if inrange(year,1997,2019) // globals set in "Set globals"
	keep persnr
	duplicates drop
	set seed 666
	gen uni = runiform()
	gen samp = (uni >= .5)
	replace samp = samp + 1 //sample splits are now labeled 1, 2
	drop uni
    save "$temp/pers_samp_id.dta", replace
	clear
	
*** Generate different datasets (spanning subsets of years) using loop
** Step 1: Indentify moves
local i_list = ""
foreach tw in 3 7 {
forval y = 1997(1)2019 {
if `y'-(`tw'-1)/2 > 1997 & `y'+(`tw'-1)/2 < 2019 {
local addto = `y'*10 + `tw'
local i_list "`i_list' `addto'"
}
}
}
dis "`i_list'"
foreach i in `i_list' 3 {  
    forvalues s = 1/2 {
	global ver = `i' * 10 + `s'  
	dis as text "ver = $ver"
	do "Set globals.do"	
	global s = `s'
	*Generate mover files
	cap log close
	log using "$log\14_01_identify_moves_BeH_sample_selection_ver${ver}_$S_DATE", text replace
	** Input:
	* "$data/cleaned_obs_all.dta"
	** Output:
	* "$data/cleaned_obs_ver$ver.dta" split 1/2
	* "$data/cleaned_obs_reconfig_hockey_ver$ver.dta" split 1/2
	* "$data/movers_ver$ver" 1/2
	* "$data/workers_currentid_year_ver$ver.dta" split 1/2
	do "$prog\14_01_identify_moves_BeH_sample_selection.do"
	clear
	cap log close	
	}
}
** Step 2: Generate hockey sticks and generate datasets for Matlab 
foreach i in `i_list' 3 { 
	forv s = 1/2 {	
	global ver = `i' * 10 + `s'  
	dis as text "ver = $ver"
	do "Set globals.do"
	* Generate hockey sticks
	cap log close
	log using "$log\05_mover_inputs_BeH_ver$ver_$S_DATE", text replace
	** Input:
	* "$data/cleaned_obs_reconfig_hockey_ver$ver.dta"
	** Output:
	* "$data/move_rates_growth_raw_ver$ver.dta"
	* "$data/estab_eeenrates_byyr_ver$ver.dta"
	* "$graph/hockey_stick_cut_ver$ver.pdf"
	* "$graph/hockey_stick_ver$ver.pdf"
	do "$prog\05_mover_inputs_BeH"
	clear
	cap log close
	* Generate datasets for Matlab (to find connected sets)
	cap log close
	log using "$log\06_str_conn_input_BeH_ver$ver_$S_DATE", text replace
	** Input:
	* "$data/movers_ver$ver.dta"
	* "$data/estab_eeenrates_byyr_ver$ver.dta"
	* "$data/estab_matrix_ids_ver$ver.dta"
	** Output:
	* "$data/str_conn_input_ver$ver.dta"
	* "$data/estab_matrix_ids_ver$ver.dta" is used within 04_
	* "$data/estab_matrix_ids_orig_ver$ver.dta"
	* "$data/restricted_senders_ver$ver.dta"
	* "$matlab\sconn_idfy\flows_ver$ver.csv"
	do "$prog\06_str_conn_input_BeH"
	clear
	cap log close
	}
}

stop



********************************************************************************
********************************************************************************
********************************************************************************
*																			   *
* Run the following file in Matlab to find connected sets: sconn_idfy2s.m       *
* After running Matlab, continue here:										   *
*																			   *
********************************************************************************
********************************************************************************
********************************************************************************



*** Generate files to estimate (main) Sorkin model
local i_list = ""
foreach tw in 3 7 {
forval y = 1997(1)2019 {
if `y'-(`tw'-1)/2 > 1997 & `y'+(`tw'-1)/2 < 2019 {
local addto = `y'*10 + `tw'
local i_list "`i_list' `addto'"
}
}
}
dis "`i_list'"
foreach i in `i_list' 3 {
	forv s = 1/2 {	
	global ver = `i' * 10 + `s'  
	dis as text "ver = $ver"
	do "Set globals.do"
	capture log close
	log using "$log/07_model_inputs_BeH_ver$ver_$S_DATE", text replace
	** Input:
	* "$matlab\sconn_idfy\sconn_idfy_ver$ver.csv"
	* "$data/estab_matrix_ids_orig_ver$ver.dta"
	* "$data/str_conn_input_ver$ver.dta"
	* "$data/estab_eeenrates_byyr_ver$ver.dta"
	* "$data\workers_currentid_year_ver$ver.dta"
	* "$temp/wz.dta"
	** Output:
	* "$data/estab_matrix_ids_ver$ver.dta"
	* "$data/fo_new_ver$ver.dta"
	* "$data/model_input_new_ver$ver.dta"
	* "$matlab\data_new\connected_moves_ver$ver.csv"
	* "$matlab\data_new\params_ver$ver.csv"
	do "$prog\07_model_inputs_BeH"
	clear
	cap log close
	}
}

stop



********************************************************************************
********************************************************************************
********************************************************************************
*																			   *
* Run the following file in Matlab: estimate_model_v7s.m 					   *
* After running Matlab, continue here:										   *
*																			   *
********************************************************************************
********************************************************************************
********************************************************************************



*** Generate alternative connected sets based on employer-skill combinations
local i_list = ""
foreach tw in 3 7 {
forval y = 1997(1)2019 {
if `y'-(`tw'-1)/2 > 1997 & `y'+(`tw'-1)/2 < 2019 {
local addto = `y'*10 + `tw'
local i_list "`i_list' `addto'"
}
}
}
dis "`i_list'"
foreach i in `i_list' 3 { 
	forv s = 1/2 {	
	global ver = `i' * 10 + `s'  
	dis as text "ver = $ver"
	do "Set globals.do"		
	* Generate datasets for Matlab
	cap log close
	log using "$log\08_str_conn_input_BeH_skill_ver${ver}_$S_DATE", text replace
	** Input:
	* "$data/movers_ver$ver.dta"
	* "$data/estab_eeenrates_byyr_ver$ver.dta"
	* "$data/estab_matrix_ids_ver$ver.dta"
	** Output:
	* "$data/str_conn_input_skill_ver$ver.dta"
	* "$data/estab_matrix_ids_skill_ver$ver.dta" is used within 04_
	* "$data/estab_matrix_ids_orig_skill_ver$ver.dta"
	* "$data/restricted_senders_skill_ver$ver.dta"
	* "$matlab\sconn_idfy\flows_skill_ver$ver.csv"
	do "$prog\08_str_conn_input_BeH_skill"
	clear
	cap log close
	}
}

stop



********************************************************************************
********************************************************************************
********************************************************************************
*																			   *
* Run the following file in Matlab to find connected sets: sconn_idfy2s_skill.m *
* After running Matlab, continue here:										   *
*																			   *
********************************************************************************
********************************************************************************
********************************************************************************



*** Generate files to estimate Sorkin model for employer-skill combinations
local i_list = ""
foreach tw in 3 {  // did not run for 7
forval y = 1997(1)2019 {
if `y'-(`tw'-1)/2 > 1997 & `y'+(`tw'-1)/2 < 2019 {
local addto = `y'*10 + `tw'
local i_list "`i_list' `addto'"
}
}
}
dis "`i_list'"
foreach i in `i_list' 3 { 
	forv s = 1/2 {	
	global ver = `i' * 10 + `s'  
	dis as text "ver = $ver"
	do "Set globals.do"
	capture log close
	log using "$log/09_model_inputs_BeH_skill_ver${ver}_$S_DATE", text replace
	** Input:
	* "$matlab\sconn_idfy\sconn_idfy_ver$ver.csv"
	* "$data/estab_matrix_ids_orig_ver$ver.dta"
	* "$data/str_conn_input_ver$ver.dta"
	* "$data/estab_eeenrates_byyr_ver$ver.dta"
	* "$data\workers_currentid_year_ver$ver.dta"
	* "$temp/wz.dta"
	** Output:
	* "$data/estab_matrix_ids_ver$ver.dta"
	* "$data/fo_new_ver$ver.dta"
	* "$data/model_input_new_ver$ver.dta"
	* "$matlab\data_new\connected_moves_ver$ver.csv"
	* "$matlab\data_new\params_ver$ver.csv"
	do "$prog\09_model_inputs_BeH_skill"
	clear
	cap log close
	}
}

stop



********************************************************************************
********************************************************************************
********************************************************************************
*																			   *
* Run the following file in Matlab: estimate_model_skill_v7s.m				   *
* After running Matlab, continue here:										   *
*																			   *
********************************************************************************
********************************************************************************
********************************************************************************



*** Estimating AKM effects
local i_list = ""
foreach tw in 3 7 {
forval y = 1997(1)2019 { 
if `y'-(`tw'-1)/2 > 1997 & `y'+(`tw'-1)/2 < 2019 {
local addto = `y'*10 + `tw'
local i_list "`i_list' `addto'"
}
}
}
dis "`i_list'"
foreach i in `i_list' 3 {
	forv s = 1/2 {	
	global ver = `i' * 10 + `s'  
	do "Set globals.do"
	capture log close
	log using "$log/10_AKM_ver$ver_$S_DATE", text replace
	do "$prog\10_AKM.do"
	clear
	cap log close	
	}
}

*** Estimating AKM effects for employer-skill combinations 
local i_list = ""
foreach tw in 3 {
forval y = 1997(1)2019 { 
if `y'-(`tw'-1)/2 > 1997 & `y'+(`tw'-1)/2 < 2019 {
local addto = `y'*10 + `tw'
local i_list "`i_list' `addto'"
}
}
}
dis "`i_list'"
foreach i in `i_list' { // did not run for 3
	forv s = 1/2 {	
	global ver = `i' * 10 + `s'  
	do "Set globals.do"
	capture log close
	log using "$log/11_AKM_skill_ver${ver}_$S_DATE", text replace
	do "$prog\11_AKM_skill.do"
	clear
	cap log close
	}
}

*** Produce statistics about values and AKM effects etc.
local i_list = "19993 20023 20053 20083 20113 20143 20173 20017 20087 20157" 
foreach i in `i_list' 3 {
	forv s = 1/2 {	
	global ver = `i' * 10 + `s'  
	dis as text "ver = $ver"	
	do "Set globals.do"
	capture log close
	log using "$log/12_analyze_values_BeH_ver${ver}_${S_DATE}", text replace
	** Input:
	* "$matlab\data_new\exp_V_ver$ver.csv"
	* "$data/workers_currentid_year_ver$ver.dta"
	* "$AKM/akm_estab_ver$ver.dta"
	* "$temp/wz.dta"
	** Output:
	* "$data/values_new_ver$ver.dta"
	* "$data/workers_betnr_ver$ver.dta"
	* "$data/firmfe_value_new_ver$ver"
	* "$results/V_fe_p5_2_99_ver$ver.csv"
	* "$graph/V_fe_p5_2_99_ver$ver.pdf"
	* "$results/the_one_ver$ver.dta"
	* "$results/firmfe_value_sector_ver$ver.csv"
	* "$graph/sector_map_ver$ver.pdf"
	* "$graph/sector_map_wo_78_ver$ver.pdf"
	* "$graph/sector_map_wo_78_new_ver$ver.pdf"
	do "$prog\12_analyze_values_BeH.do"
	clear
	cap log close
	}
}

*** Produce statistics about values and AKM effects etc. for employer-skill combinations
local i_list = "19993 20023 20053 20083 20113 20143 20173"
foreach i in `i_list' {
	forv s = 1/2 {	
	global ver = `i' * 10 + `s'  
	dis as text "ver = $ver"	
	do "Set globals.do"
	capture log close
	log using "$log/13_analyze_values_BeH_skill_ver${ver}_${S_DATE}", text replace
	** Input:
	* "$matlab\data_new\exp_V_ver$ver.csv"
	* "$data/workers_currentid_year_ver$ver.dta"
	* "$AKM/akm_estab_ver$ver.dta"
	* "$temp/wz.dta"
	** Output:
	* "$data/values_new_ver$ver.dta"
	* "$data/workers_betnr_ver$ver.dta"
	* "$data/firmfe_value_new_ver$ver"
	* "$results/V_fe_p5_2_99_ver$ver.csv"
	* "$graph/V_fe_p5_2_99_ver$ver.pdf"
	* "$results/the_one_ver$ver.dta"
	* "$results/firmfe_value_sector_ver$ver.csv"
	* "$graph/sector_map_ver$ver.pdf"
	* "$graph/sector_map_wo_78_ver$ver.pdf"
	* "$graph/sector_map_wo_78_new_ver$ver.pdf"
	do "$prog\13_analyze_values_BeH_skill.do"
	clear
	cap log close
	}
}


clear
cap close
